{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pycuda.gpuarray as gpuarray\n",
    "import pycuda.cumath as cmath\n",
    "import pycuda.driver as cuda\n",
    "import pycuda.autoinit\n",
    "import numpy as np\n",
    "import cv2 as cv\n",
    "import math\n",
    "import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CPU ImagePowFunc Section\n",
    "The image data must be a gray scale graph.\n",
    "The for loops always cost lots of time while processing image data.\n",
    "Even just implement a if judge in for loops,the time cost is big."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(256, 256)\n",
      "0.368874788284 s\n"
     ]
    }
   ],
   "source": [
    "I = cv.imread('/home/nvidia-mm/Workspace/frame_diff_py/lena.jpg',cv.IMREAD_GRAYSCALE)\n",
    "I_shape = I.shape\n",
    "print I_shape\n",
    "I = I.astype(np.float32)\n",
    "t_start = time.time()\n",
    "for row in range(I_shape[1]):\n",
    "    for clo in range(I_shape[0]):\n",
    "        I[row][clo] = math.pow(I[row][clo],0.8)\n",
    "I = I.astype(np.uint8)\n",
    "t_end = time.time()\n",
    "print t_end - t_start,'s'\n",
    "cv.imshow('lena',I)\n",
    "while True:\n",
    "    if cv.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "cv.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## GPU ImagePowFunc Section\n",
    "We should notice that the SourceModule-func called will change the type of I_gpu Deviceptr to gpuarray if I_gpu has not been changed in SourceModule-func.Then if we want to give a new value to I_gpu,we need to use gpuarrar.tp_gpu() or I_gpu.get() func instead of cuda.memcpy_htod(dest,src) or cuda.memcpy_dtoh(dest,src)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(256, 256)\n",
      "0.00199699401855 s\n"
     ]
    }
   ],
   "source": [
    "from pycuda.compiler import SourceModule\n",
    "I = cv.imread('/home/nvidia-mm/Workspace/frame_diff_py/lena.jpg',cv.IMREAD_GRAYSCALE)\n",
    "I_shape = I.shape\n",
    "print I_shape\n",
    "I = I.astype(np.float32)\n",
    "\n",
    "W = I_shape[0]\n",
    "H = I_shape[1]\n",
    "S = W * H\n",
    "FLOAT_BYTES = S * 4 # uint8 to float\n",
    "\n",
    "w = np.int32(W)\n",
    "h = np.int32(H)\n",
    "p = np.float32(0.8)\n",
    "\n",
    "mod = SourceModule(\"\"\"\n",
    "        __global__ void GrayImgPow(float *A, int w, int h, float p)\n",
    "        {\n",
    "          int idx = blockIdx.x*blockDim.x + threadIdx.x;\n",
    "          int idy = blockIdx.y*blockDim.y + threadIdx.y;\n",
    "          int line;\n",
    "          line = idy*w;\n",
    "          if(idx < w && idy < h){\n",
    "            *(A+line+idx) = pow(*(A+line+idx),p);\n",
    "          }\n",
    "        }\n",
    "        \"\"\")\n",
    "\n",
    "blockDimX = 16\n",
    "blockDimY = 16\n",
    "gridDimX = W / blockDimX\n",
    "gridDimY = H / blockDimY\n",
    "\n",
    "I_gpu = cuda.mem_alloc(FLOAT_BYTES)\n",
    "cuda.memcpy_htod(I_gpu, I) # dest src\n",
    "ImgPow = mod.get_function(\"GrayImgPow\")\n",
    "\n",
    "t_start = time.time()\n",
    "ImgPow(I_gpu, w, h, p, block=(blockDimX,blockDimY,2),grid=(gridDimX,gridDimY))\n",
    "cuda.memcpy_dtoh(I, I_gpu)\n",
    "I = I.astype(np.uint8)\n",
    "t_end = time.time()\n",
    "\n",
    "print t_end - t_start,'s'\n",
    "cv.imshow('lena',I)\n",
    "while True:\n",
    "    if cv.waitKey(1) & 0xFF == ord('q'):\n",
    "        break\n",
    "cv.destroyAllWindows()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CPU ImageGuassFilter Section"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "I = cv.imread('/home/nvidia-mm/Workspace/frame_diff_py/lena.jpg',cv.IMREAD_GRAYSCALE)\n",
    "guass_mask = np.random.normal(loc=4.0,scale=1.0,size=(5,5))\n",
    "print guass_mask"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
